#https://data.cdc.gov/NCHS/Weekly-Counts-of-Deaths-by-State-and-Select-Causes/muzy-jte6/data
# loading packages
library(tidyverse)
library(RSocrata)
library(hablar)
library(geofacet)
library(lubridate)
library(janitor)
weeks <- read_csv("Data/weeks.csv")
weeks <- weeks %>%
mutate(start_date=dmy(start_date)-1,
end_date=dmy(end_date)-1)
# importing cdc cause specific death data for 2014-2018, grabbing columns for cause, omitting flag columns. Full list here: https://dev.socrata.com/foundry/data.cdc.gov/3yf8-kanr
causes_1418 <- read_csv(
"https://data.cdc.gov/api/views/3yf8-kanr/rows.csv?accessType=DOWNLOAD") %>%
data.frame() %>%
clean_names() %>%
select(jurisdiction_of_occurrence,
mmwryear=mmwr_year, mmwrweek=mmwr_week, weekendingdate=week_ending_date,
allcause=all_cause,naturalcause=natural_cause,
malignant_neoplasms_c00_c97, alzheimer_disease_g30,
chronic_lower_respiratory=chronic_lower_respiratory_diseases_j40_j47 , symptoms_signs_and_abnormal=symptoms_signs_and_abnormal_clinical_and_laboratory_findings_not_elsewhere_classified_r00_r99,
diseases_of_heart_i00_i09=diseases_of_heart_i00_i09_i11_i13_i20_i51, cerebrovascular_diseases=cerebrovascular_diseases_i60_i69 ,
septicemia_a40_a41, diabetes_mellitus_e10_e14,
influenza_and_pneumonia_j10=influenza_and_pneumonia_j10_j18,
other_diseases_of_respiratory=other_diseases_of_respiratory_system_j00_j06_j30_j39_j67_j70_j98,
nephritis_nephrotic_syndrome=nephritis_nephrotic_syndrome_and_nephrosis_n00_n07_n17_n19_n25_n27) %>%
mutate(weekendingdate=mdy(weekendingdate),
covid19_multiple_causes_of_death=NA,
covid19_underlying_cause_of_death=NA,
mmwryear=as.numeric(mmwryear),
mmwrweek=as.numeric(mmwrweek))
# importing cdc cause specific death data for 2019-2020
#causes_1920 <- read.socrata(
# "https://data.cdc.gov/resource/muzy-jte6.json?$select=jurisdiction_of_occurrence,mmwryear,mmwrweek,weekendingdate,allcause,naturalcause,septicemia_a40_a41,malignant_neoplasms_c00_c97,diabetes_mellitus_e10_e14,alzheimer_disease_g30,influenza_and_pneumonia_j10,chronic_lower_respiratory,other_diseases_of_respiratory,nephritis_nephrotic_syndrome,symptoms_signs_and_abnormal,diseases_of_heart_i00_i09,cerebrovascular_diseases"
#)
#cause_names <- colnames(causes_1418)
library(stringr)
library(lubridate)
library(jsonlite)
library(janitor)
causes_1920 <- read_csv("https://data.cdc.gov/api/views/muzy-jte6/rows.csv?accessType=DOWNLOAD") %>%
data.frame() %>%
clean_names() %>%
select(jurisdiction_of_occurrence,
mmwryear=mmwr_year, mmwrweek=mmwr_week, weekendingdate=week_ending_date,
allcause=all_cause,naturalcause=natural_cause,
malignant_neoplasms_c00_c97, alzheimer_disease_g30,
chronic_lower_respiratory=chronic_lower_respiratory_diseases_j40_j47 , symptoms_signs_and_abnormal=symptoms_signs_and_abnormal_clinical_and_laboratory_findings_not_elsewhere_classified_r00_r99,
diseases_of_heart_i00_i09=diseases_of_heart_i00_i09_i11_i13_i20_i51, cerebrovascular_diseases=cerebrovascular_diseases_i60_i69 ,
septicemia_a40_a41, diabetes_mellitus_e10_e14,
influenza_and_pneumonia_j10=influenza_and_pneumonia_j10_j18,
other_diseases_of_respiratory=other_diseases_of_respiratory_system_j00_j06_j30_j39_j67_j70_j98,
nephritis_nephrotic_syndrome=nephritis_nephrotic_syndrome_and_nephrosis_n00_n07_n17_n19_n25_n27,
covid19_multiple_causes_of_death=covid_19_u071_multiple_cause_of_death,
covid19_underlying_cause_of_death=covid_19_u071_underlying_cause_of_death) %>%
mutate(weekendingdate=mdy(weekendingdate))
# append the 19-20 data to the older data
causes_1420 <- bind_rows(causes_1920, causes_1418)
# using hablar package's 'convert' to convert cause of death columns from character type to doubles to perform calculations
causes_1420 <- causes_1420 %>%
convert(dbl(allcause,
naturalcause,
septicemia_a40_a41,
malignant_neoplasms_c00_c97,
diabetes_mellitus_e10_e14,
alzheimer_disease_g30,
influenza_and_pneumonia_j10,
chronic_lower_respiratory,
other_diseases_of_respiratory,
nephritis_nephrotic_syndrome,
symptoms_signs_and_abnormal,
diseases_of_heart_i00_i09,
cerebrovascular_diseases,
covid19_multiple_causes_of_death,
covid19_underlying_cause_of_death
))
# calculate unnatural causes of death
causes_1420 <- causes_1420 %>%
mutate(unnaturalcause = allcause - naturalcause)
# dropping Puerto Rico to count only jurisdictions in 50 states, per CDC guidance
causes_1420 <- causes_1420 %>%
filter(jurisdiction_of_occurrence != "Puerto Rico")
# summing states and grouping data by week
causes_us_1420 <- causes_1420 %>%
group_by(weekendingdate) %>%
summarize(allcause = sum(allcause, na.rm=TRUE),
naturalcause = sum(naturalcause, na.rm=TRUE),
unnaturalcause = sum(unnaturalcause, na.rm=TRUE),
septicemia_a40_a41 = sum(septicemia_a40_a41, na.rm=TRUE),
malignant_neoplasms_c00_c97 = sum(malignant_neoplasms_c00_c97, na.rm=TRUE),
diabetes_mellitus_e10_e14 = sum(diabetes_mellitus_e10_e14, na.rm=TRUE),
alzheimer_disease_g30 = sum(alzheimer_disease_g30, na.rm=TRUE),
influenza_and_pneumonia_j10 = sum(influenza_and_pneumonia_j10, na.rm=TRUE),
chronic_lower_respiratory = sum(chronic_lower_respiratory, na.rm=TRUE),
other_diseases_of_respiratory = sum(other_diseases_of_respiratory, na.rm=TRUE),
nephritis_nephrotic_syndrome = sum(nephritis_nephrotic_syndrome, na.rm=TRUE),
symptoms_signs_and_abnormal = sum(symptoms_signs_and_abnormal, na.rm=TRUE),
diseases_of_heart_i00_i09 = sum(diseases_of_heart_i00_i09,na.rm=TRUE),
cerebrovascular_diseases = sum(cerebrovascular_diseases, na.rm=TRUE),
covid19_multiple_causes_of_death=sum(covid19_multiple_causes_of_death, na.rm=T),
covid19_underlying_cause_of_death=sum(covid19_underlying_cause_of_death, na.rm=T)) %>%
mutate(naturalcause_perc = round(naturalcause/allcause,3),
unnaturalcause_perc = round(unnaturalcause/allcause,3),
septicemia_perc = round(septicemia_a40_a41/allcause,3),
malignant_neoplasms_perc = round(malignant_neoplasms_c00_c97/allcause,3),
diabetes_mellitus_perc = round(diabetes_mellitus_e10_e14/allcause,3),
alzheimer_perc = round(alzheimer_disease_g30/allcause,3),
influenza_pneumonia_perc = round(influenza_and_pneumonia_j10/allcause,3),
chronic_lower_respiratory_perc = round(chronic_lower_respiratory/allcause,3),
other_respiratory_diseases_perc = round(other_diseases_of_respiratory/allcause,3),
nephritis_nephrotic_syndrome_perc = round(nephritis_nephrotic_syndrome/allcause,3),
symptoms_signs_and_abnormal_perc = round(symptoms_signs_and_abnormal/allcause,3),
heart_disease_perc = round(diseases_of_heart_i00_i09/allcause,3),
cerebrovascular_diseases_perc = round(cerebrovascular_diseases/allcause,3),
covid19_multiple_causes_of_death_perc=round(covid19_multiple_causes_of_death/allcause,3),
covid19_underlying_cause_of_death_perc=round(covid19_underlying_cause_of_death/allcause,3)
)
# dropping most recent weeks where data are unreliable
causes_us_1420 <- causes_us_1420 %>%
filter(weekendingdate != '2020-05-09',
weekendingdate != '2020-05-02',
weekendingdate != '2020-04-25')#,
#weekendingdate != '2020-04-18')
de_tall <- causes_us_1420 %>%
pivot_longer(cols=2:ncol(causes_us_1420 ),
names_to="type",
values_to="count") %>%
left_join(weeks, by=c("weekendingdate"="end_date"))
de_tall20 <- de_tall %>%
filter(SEASON=="2019-20") %>%
filter(!grepl("_perc", type))
de_tall <- de_tall %>%
filter(SEASON!="2019-20") %>%
filter(!grepl("_perc", type))
ggplot() +
geom_line(data=de_tall, aes(x=WEEK, y=count, group=SEASON), color="grey", alpha=.7) +
geom_line(data=de_tall20, aes(x=WEEK, y=count, group=SEASON), color="red") +
facet_wrap(~type, scale="free_y") +
labs(title="Specific causes of death")

de_tall <- causes_us_1420 %>%
pivot_longer(cols=2:ncol(causes_us_1420 ),
names_to="type",
values_to="count") %>%
left_join(weeks, by=c("weekendingdate"="end_date"))
de_tall20 <- de_tall %>%
filter(SEASON=="2019-20") %>%
filter(grepl("_perc", type))
de_tall <- de_tall %>%
filter(SEASON!="2019-20") %>%
filter(grepl("_perc", type))
ggplot() +
geom_line(data=de_tall, aes(x=WEEK, y=count, group=SEASON), color="grey", alpha=.7) +
geom_line(data=de_tall20, aes(x=WEEK, y=count, group=SEASON), color="red") +
facet_wrap(~type) +
labs(title="Specific causes of death by percent")

ggplot() +
geom_line(data=de_tall, aes(x=WEEK, y=count, group=SEASON), color="grey", alpha=.7) +
geom_line(data=de_tall20, aes(x=WEEK, y=count, group=SEASON), color="red") +
facet_wrap(~type, scale="free_y") +
labs(title="Specific causes of death by percent (unscaled)")

weeks <- read_csv("Data/weeks.csv")
weeks <- weeks %>%
mutate(start_date=dmy(start_date)-1,
end_date=dmy(end_date)-1)
state.abb <- c(state.abb, "DC")
state.name <- c(state.name, "District of Columbia")
state_names <- data.frame(state.abb, state.name)
causes_1420_long <- causes_1420 %>%
filter(weekendingdate != '2020-05-09',
weekendingdate != '2020-05-02',
weekendingdate != '2020-04-25') %>% #,
#weekendingdate != '2020-04-18')
mutate(covid19_multiple_causes_of_death=as.numeric(covid19_multiple_causes_of_death),
covid19_underlying_cause_of_death=as.numeric(covid19_underlying_cause_of_death)) %>%
ungroup() %>%
pivot_longer(cols=5:ncol(causes_1420),
names_to="type",
values_to="count") %>%
mutate(weekendingdate=ymd(weekendingdate)) %>%
left_join(weeks, by=c("weekendingdate"="end_date")) %>%
filter(WEEK <=15) %>%
left_join(state_names, by=c("jurisdiction_of_occurrence"= "state.name"))
causes_1420_long20 <- causes_1420_long %>%
filter(SEASON=="2019-20")
causes_1420_long <- causes_1420_long %>%
filter(SEASON!="2019-20")
list_of_causes <- causes_1420_long %>%
pull(type) %>% unique()
for (i in 1:length(list_of_causes)) {
df20 <- causes_1420_long20 %>%
filter(type==list_of_causes[i]) %>%
filter(!is.na(state.abb))
df <- causes_1420_long %>%
filter(type==list_of_causes[i]) %>%
filter(!is.na(state.abb))
plotted <- ggplot(df20) +
geom_line(data=df, aes(x=WEEK, y=count, group=SEASON), color="grey", alpha=.7) +
geom_line(aes(x=WEEK, y=count), color="red") +
facet_geo(~state.abb, grid="us_state_grid1", scale="free_y")+
labs(title=list_of_causes[i])
print(plotted)
}
















causes_1420_long <- causes_1420 %>%
filter(weekendingdate != '2020-05-09',
weekendingdate != '2020-05-02',
weekendingdate != '2020-04-25') %>% #,
#weekendingdate != '2020-04-18')
mutate(covid19_multiple_causes_of_death=as.numeric(covid19_multiple_causes_of_death),
covid19_underlying_cause_of_death=as.numeric(covid19_underlying_cause_of_death)) %>%
group_by(jurisdiction_of_occurrence, weekendingdate) %>%
summarize(allcause = sum(allcause, na.rm=TRUE),
naturalcause = sum(naturalcause, na.rm=TRUE),
unnaturalcause = sum(unnaturalcause, na.rm=TRUE),
septicemia_a40_a41 = sum(septicemia_a40_a41, na.rm=TRUE),
malignant_neoplasms_c00_c97 = sum(malignant_neoplasms_c00_c97, na.rm=TRUE),
diabetes_mellitus_e10_e14 = sum(diabetes_mellitus_e10_e14, na.rm=TRUE),
alzheimer_disease_g30 = sum(alzheimer_disease_g30, na.rm=TRUE),
influenza_and_pneumonia_j10 = sum(influenza_and_pneumonia_j10, na.rm=TRUE),
chronic_lower_respiratory = sum(chronic_lower_respiratory, na.rm=TRUE),
other_diseases_of_respiratory = sum(other_diseases_of_respiratory, na.rm=TRUE),
nephritis_nephrotic_syndrome = sum(nephritis_nephrotic_syndrome, na.rm=TRUE),
symptoms_signs_and_abnormal = sum(symptoms_signs_and_abnormal, na.rm=TRUE),
diseases_of_heart_i00_i09 = sum(diseases_of_heart_i00_i09,na.rm=TRUE),
cerebrovascular_diseases = sum(cerebrovascular_diseases, na.rm=TRUE),
covid19_multiple_causes_of_death=sum(covid19_multiple_causes_of_death, na.rm=T),
covid19_underlying_cause_of_death=sum(covid19_underlying_cause_of_death, na.rm=T)) %>%
mutate(naturalcause_perc = round(naturalcause/allcause,3),
unnaturalcause_perc = round(unnaturalcause/allcause,3),
septicemia_perc = round(septicemia_a40_a41/allcause,3),
malignant_neoplasms_perc = round(malignant_neoplasms_c00_c97/allcause,3),
diabetes_mellitus_perc = round(diabetes_mellitus_e10_e14/allcause,3),
alzheimer_perc = round(alzheimer_disease_g30/allcause,3),
influenza_pneumonia_perc = round(influenza_and_pneumonia_j10/allcause,3),
chronic_lower_respiratory_perc = round(chronic_lower_respiratory/allcause,3),
other_respiratory_diseases_perc = round(other_diseases_of_respiratory/allcause,3),
nephritis_nephrotic_syndrome_perc = round(nephritis_nephrotic_syndrome/allcause,3),
symptoms_signs_and_abnormal_perc = round(symptoms_signs_and_abnormal/allcause,3),
heart_disease_perc = round(diseases_of_heart_i00_i09/allcause,3),
cerebrovascular_diseases_perc = round(cerebrovascular_diseases/allcause,3),
covid19_multiple_causes_of_death_perc=round(covid19_multiple_causes_of_death/allcause,3),
covid19_underlying_cause_of_death_perc=round(covid19_underlying_cause_of_death/allcause,3)
) %>%
ungroup()
causes_1420_long <- causes_1420_long %>%
pivot_longer(cols=3:ncol(causes_1420_long ),
names_to="type",
values_to="count") %>%
mutate(weekendingdate=ymd(weekendingdate)) %>%
left_join(weeks, by=c("weekendingdate"="end_date")) %>%
filter(WEEK <=15) %>%
left_join(state_names, by=c("jurisdiction_of_occurrence"= "state.name")) %>%
filter(grepl("_perc", type))
causes_1420_long20 <- causes_1420_long %>%
filter(SEASON=="2019-20")
causes_1420_long <- causes_1420_long %>%
filter(SEASON!="2019-20")
list_of_causes <- causes_1420_long %>%
pull(type) %>% unique()
for (i in 1:length(list_of_causes)) {
df20 <- causes_1420_long20 %>%
filter(type==list_of_causes[i]) %>%
filter(!is.na(state.abb))
df <- causes_1420_long %>%
filter(type==list_of_causes[i]) %>%
filter(!is.na(state.abb))
plotted <- ggplot(df20) +
geom_line(data=df, aes(x=WEEK, y=count, group=SEASON), color="grey", alpha=.7) +
geom_line(aes(x=WEEK, y=count), color="red") +
facet_geo(~state.abb, grid="us_state_grid1", scale="free_y")+
labs(title=list_of_causes[i])
print(plotted)
}



